discard

		set more off 

 use "$healthsave\merged health data_w7.dta", clear 


	*----------------------------------*	
	* indicator for core sample member *
	*----------------------------------*
		
		lab define core 0 "Non-core" 1 "Core" 
		forv i = 1/7 { 
			egen core_w`i' = anymatch(finstatw`i'), v(1 2 7 8 14 15 25 28) 
			lab val core_w`i' core 
			lab var core_w`i' "Core sample member in wave `i'" 
		}
		
	
	
	*--------------------------------------------*	
	*   construct indicator for presence in all  *
	*        waves or attrition or reentry       *             
	*--------------------------------------------*	
		
		gen firstwave = 7*(inwave7==1)
			lab define firstwave 7 "Wave 7" 
			
		foreach i of numlist 6 5 4 3 2 1 {
			replace firstwave = `i' if inwave`i' == 1  
			lab define firstwave `i' "Wave `i'" , add 
		}
		
		lab var firstwave "First successful interview wave"
		lab val firstwave firstwave 
		
		gen lastwave = inwave1 
			lab define lastwave 1 "Wave 1" 
			
		forv i = 2/7 { 
			replace lastwave = `i' if inwave`i' == 1 
			lab define lastwave `i' "Wave `i'" , add 
		}
		
		lab var lastwave "Last successful interview wave" 
		lab val lastwave lastwave 

		
		** note: seems to be some diagreement between index file and 
		** 		 HSE data - HSE data is available for some who are 
		** 		 apparently not matched according to the index file. 
		** 		 trust the data rather than the index file. 
		
	
	*---------------------------------------------------------------*
	* construct summary indicator for which waves respondent was in *
	*---------------------------------------------------------------*
		
		gen long inwave  = (10 * firstw) + lastw 
		
		**re-entrants 
			forvalues i=3/7 {
				local Pi=`i'-1
				quietly: replace inwave = 90000000 					///
					if `i'>firstw 									///
						& inwave`Pi'==0  & inwave`i' ==1
			}
		
		forv i = 1/7 {
			replace inwave = inwave + 10^(7-`i') 			///
			if inwave`i'==1 & inwave>=9000000
		}
		
		label var inwave 	///
			"Values take first and last observed wave, or 9 followed by 1/0 every wave present/not-present (re-entrants)"

	** alive in a particular wave 
	
		forv i = 1/7 { 
			gen alive_w`i' = firstw <= `i' & lastw >= `i'
			label var alive_w`i' "Alive in this wave - indicates either presence in current wave or both in past and future waves" 
		}
	
	
	
	/** for documentation **/	
		cd "P:\ELSA\Healthdynamics\documentation\tables\" 
		gen n = 1 
		tabout firstwave lastwave using first_and_last_wave_w7.txt, replace
		tabout inwave using numbers_in_each_wave_w7.txt, replace 
		drop n 
		
		preserve
		keep idauniq inwave* agew? inwave sex
		gen bp = inwave==16 
		drop inwave 
		recode sex (1=0) (2=1) 
		reshape long inwave agew , i(idauniq) j(wave) 
		keep if inwave==1 										
		tabout wave using average_age_in_each_wave_w7.txt , c(count agew mean agew mean sex) ///
				f(4) sum replace 
		tabout wave if bp==1 using average_age_in_each_wave_bp_w7.txt, c(count agew mean agew mean sex) ///
				f(4) sum replace 
		restore 
			
		
	
	/* rename to standardise interview dates */
		forv i =1/2  {
			ren iintdtyw`i' intyear_w`i' 
			ren iintdtmw`i' intmonth_w`i' 
		}
	
		forv i = 3/7 {
			ren iintdatyw`i' intyear_w`i' 
			ren iintdatmw`i' intmonth_w`i' 
		}
		

/* get interview date */ 

	forv i = 1/7 { 
		gen intdate_w`i' = ym(intyear_w`i' , intmonth_w`i') 
		format intdate_w`i' %tm
		lab var intdate_w`i' "Wave `i' interview date (realised or virtual)"
	}	

	
	forv i = 1/6 { 
	local Ni = `i'+1
		gen intdiff`i'_`Ni' = intdate_w`Ni' - intdate_w`i'  
	}

	
/* for documentation - table of difference in interview times */	
	
	cd "P:\ELSA\Healthdynamics\documentation\tables\" 
	gen n = 1 
	tabout intdiff1_2 intdiff2_3 intdiff3_4 intdiff4_5 intdiff5_6 n ///
		using differences_in_interview_times.txt , c(freq) replace 
	drop n 
	
	
	
/** THIS NEXT BIT OF CODE TAKES AN IRRITATINGLY LONG TIME TO RUN. 
	I HAVE THEREFORE SEPARATED IT OUT FROM THE REST OF THE CODE 
	BUT IF EVER NEW PEOPLE ARE ADDED TO ELSA IT WILL NEED TO BE RE-RUN **/
	
	
	

/* impute difference in interview times	*/
	* impute from a collapsed distribution of interview 
	* time differences - distribution of interview time differences 
	* not constant between different waves 
	* use inner 80% of distribution 
	

	preserve 
	
	forv i = 1/6 { 
		local Ni = `i'+1
		qui: su intdiff`i'_`Ni', detail
		replace intdiff`i'_`Ni' = `r(p10)' if intdiff`i'_`Ni'<`r(p10)' 
		replace intdiff`i'_`Ni' = `r(p90)' if intdiff`i'_`Ni'>`r(p90)' & intdiff`i'_`Ni'!=.
		imputesk intdiff`i'_`Ni' , id(idauniq) 
	} 
	
	keep idauniq dvintdiff??? 
	save "$healthsave\random interview date differences_w7.dta", replace 
	
	restore 
	
	cap drop _merge
	merge 1:1 idauniq using "$healthsave\random interview date differences_w7.dta"
	
	
/** fill in missing interview dates based on 
	difference in months **/

forv i = 2/7 { 
	local Pi = `i'-1 
		
		replace intdate_w`i' = intdate_w`Pi' + dvintdiff`Pi'_`i' if intdate_w`i'==. & intdate_w`Pi'!=. 
		gen tmp`i' = dofm(intdate_w`i') 
		format tmp`i' %td
		gen tmpyr`i' = year(tmp`i') 
		gen tmpmonth`i' = month(tmp`i') 		
			
		replace intyear_w`i'  =  tmpyr`i' 	 if intyear_w`i' == . 
		replace intmonth_w`i' =  tmpmonth`i' if intmonth_w`i' == . 
}
	
	drop intdiff* tmpyr? tmpmonth? tmp? 
	
	
	/** fill in ages based on filled in interview dates **/ 
	
		gen mbday = ym(year(dob), month(dob))
		format mbday %tm
		
		forv i = 1/6 { 	
			gen mage`i' = intdate_w`i' - mbday
			replace agew`i' = floor(mage`i'/12) if agew`i' == . 	
		}
	
			forv i = 7/7 { 	
			gen mage`i' = intdate_w`i' - mbday
			replace agew`i' = floor(mage`i'/12) 
		}
	
	** note that all those who are ever observed PRIOR to 
	** a wave will have their virtual interview date/age imputed. 
	** This imputes age for all wave 7 obs as don't have int date for anyone yet
	
	save "$healthsave\merged health data cleaned_w7.dta"		 , replace
